##################
# This file replicates the numbers used in the main manuscripts.
# 08/18/2024
##################

#Page 15

#"There are a total of 2,628 tweets (3%) talking about Chinese actions in Xinjiang in the 82,110 tweets diplomatic corpus."
sum(mydata$xinjiang) # 2628 tweets
sum(mydata$xinjiang)/nrow(mydata) # 3% of total tweets

#"Among these, Beijing used the threat strategy in 290 tweets (11%) and the benevolent rule strategy in 545 tweets (21%)."
sum(mydata$threat)# 290 tweets 
sum(mydata$threat)/sum(mydata$xinjiang) # 11% of total xinjiang tweets

sum(mydata$bene_rule)/sum(mydata$xinjiang) # 545 tweets
sum(mydata$bene_rule)/sum(mydata$xinjiang) # 21% of total xinjiang tweets

#"A Pearson correlation test between the threat strategy and the benevolent rule strategy yields a correlation coefficient of 0.11."
cor.test(mydata$threat, mydata$bene_rule, use = "pairwise.complete.obs") #correlation coefficient of 0.11

#Page 14

#"Only 5 of the 88 Twitter accounts in the sample surpassed the 3,200 tweets threshold."
x = mydata %>% dplyr::select(screen_name, statuses_count)
x = x %>% distinct(screen_name, statuses_count, .keep_all = TRUE) #remove duplicates
table(x$statuses_count>3200)
#ChinaEmbTurkey, CathayPak, AmbassadeChine, Chinaembmanila, and ChinaEmbOttawa have more than 3,200 tweets

#Page 13

#"Based on this study’s data, 96% of the tweets published by the Chinese Embassy in Brazil (@EmbaixadaChina) are in Portuguese."
table(mydata$lang) #language column

x = subset(mydata, screen_name == "EmbaixadaChina")
#Chinese Embassy in Brazil (@EmbaixadaChina) 
prop.table(table(x$lang))
#pt is Portuguese
